Installing and loading our package
To get an overview on abstraction levels and chemical diversity, sunburst plots can be carried out.
To speed up Random Forest analyses, the use of parallel processing is recommended.
library(parallel)
library(e1071)
library(doMC)
#> Loading required package: foreach
#> Loading required package: iterators
nSlaves <- detectCores(all.tests=FALSE, logical=FALSE)
registerDoMC(nSlaves)First, we demonstrate the use of PLS-DA to select essential variables.
suppressWarnings(
sel_pls_comp_list <- select_features_pls(feat_matrix=marchantiales$comp_list, sel_factor=as.factor(marchantiales$metadata$species), sel_colors=marchantiales$metadata$color, components=(nlevels(as.factor(marchantiales$metadata$species))-1))
)
#> [1] "Number of chosen components: 13"Print selected variables and model metrics.
print(paste("Number of essential variables:", length(unique(unlist(sel_pls_comp_list$`_selected_variables_`)))))
#> [1] "Number of essential variables: 791"
print(sel_pls_comp_list$`_selected_variables_`)
#> [1] "FT00040_neg" "FT00076_neg" "FT00094_neg" "FT00104_neg" "FT00340_neg"
#> [6] "FT00343_neg" "FT00352_neg" "FT00357_neg" "FT00508_neg" "FT00797_neg"
#> [11] "FT00968_neg" "FT01114_neg" "FT01116_neg" "FT01551_neg" "FT01576_neg"
#> [16] "FT01577_neg" "FT01949_neg" "FT02066_neg" "FT02233_neg" "FT02249_neg"
#> [21] "FT02335_neg" "FT02967_neg" "FT03304_neg" "FT03360_neg" "FT03364_neg"
#> [26] "FT03392_neg" "FT03416_neg" "FT03460_neg" "FT03571_neg" "FT03572_neg"
#> [31] "FT03665_neg" "FT03698_neg" "FT03756_pos" "FT03861_neg" "FT03900_neg"
#> [36] "FT04046_neg" "FT04572_neg" "FT04869_neg" "FT04870_neg" "FT05113_neg"
#> [41] "FT05196_neg" "FT05261_neg" "FT05511_neg" "FT05775_neg" "FT05894_neg"
#> [46] "FT05942_neg" "FT05954_neg" "FT06034_neg" "FT06531_neg" "FT06746_neg"
#> [51] "FT06981_neg" "FT07388_neg" "FT07390_neg" "FT07498_neg" "FT07566_neg"
#> [56] "FT07678_neg" "FT07721_neg" "FT07783_neg" "FT08088_neg" "FT08118_neg"
#> [61] "FT08177_neg" "FT08221_neg" "FT08329_neg" "FT08334_neg" "FT08629_neg"
#> [66] "FT08637_neg" "FT08752_neg" "FT08929_neg" "FT08996_neg" "FT09169_neg"
#> [71] "FT09386_neg" "FT09522_neg" "FT09736_neg" "FT10018_neg" "FT10317_neg"
#> [76] "FT10605_neg" "FT10619_neg" "FT10808_neg" "FT10810_neg" "FT11038_neg"
#> [81] "FT11649_neg" "FT11741_neg" "FT11848_neg" "FT11934_neg" "FT11990_neg"
#> [86] "FT12247_neg" "FT12249_neg" "FT12589_neg" "FT12590_neg" "FT12936_neg"
#> [91] "FT13008_neg" "FT13210_neg" "FT13346_neg" "FT13484_pos" "FT13762_neg"
#> [96] "FT13794_neg" "FT14654_neg" "FT15023_pos" "FT15325_neg" "FT15342_neg"
#> [101] "FT15608_neg" "FT15639_neg" "FT15657_neg" "FT16328_neg" "FT16528_neg"
#> [106] "FT16621_neg" "FT16722_neg" "FT00036_neg" "FT00071_neg" "FT00079_neg"
#> [111] "FT00118_neg" "FT00254_pos" "FT00670_neg" "FT01022_neg" "FT01246_neg"
#> [116] "FT01465_pos" "FT01801_pos" "FT01851_pos" "FT01879_pos" "FT02193_neg"
#> [121] "FT02672_neg" "FT03569_neg" "FT03704_neg" "FT03930_neg" "FT04015_neg"
#> [126] "FT04018_neg" "FT04181_neg" "FT04467_neg" "FT04468_neg" "FT04830_pos"
#> [131] "FT05099_pos" "FT05214_pos" "FT05288_neg" "FT05559_neg" "FT05811_neg"
#> [136] "FT06019_neg" "FT06022_neg" "FT06026_pos" "FT06201_neg" "FT06225_neg"
#> [141] "FT06228_neg" "FT06244_neg" "FT06441_neg" "FT06443_neg" "FT06486_neg"
#> [146] "FT06582_neg" "FT06584_neg" "FT06588_neg" "FT06738_neg" "FT06818_neg"
#> [151] "FT06923_pos" "FT07004_neg" "FT07069_neg" "FT07148_pos" "FT07249_pos"
#> [156] "FT07316_neg" "FT07337_pos" "FT07453_neg" "FT07617_neg" "FT07838_neg"
#> [161] "FT07888_neg" "FT07911_neg" "FT07945_neg" "FT08038_pos" "FT08156_pos"
#> [166] "FT08345_neg" "FT08410_pos" "FT08624_pos" "FT08756_neg" "FT08811_neg"
#> [171] "FT08812_neg" "FT08902_pos" "FT09165_neg" "FT09226_neg" "FT09260_neg"
#> [176] "FT09606_pos" "FT09608_pos" "FT09616_neg" "FT09684_neg" "FT09797_neg"
#> [181] "FT09839_neg" "FT09928_neg" "FT10161_neg" "FT10193_neg" "FT10752_pos"
#> [186] "FT11092_neg" "FT11352_pos" "FT11378_pos" "FT11408_neg" "FT11567_pos"
#> [191] "FT11869_pos" "FT11886_neg" "FT11887_neg" "FT11982_pos" "FT12072_pos"
#> [196] "FT12073_neg" "FT12098_pos" "FT12240_neg" "FT12242_neg" "FT12312_neg"
#> [201] "FT12685_neg" "FT12692_neg" "FT12700_neg" "FT12938_neg" "FT12992_neg"
#> [206] "FT13156_pos" "FT13236_neg" "FT13237_neg" "FT13238_neg" "FT13370_neg"
#> [211] "FT13539_neg" "FT13632_neg" "FT13657_pos" "FT14309_pos" "FT14320_pos"
#> [216] "FT14732_neg" "FT14734_neg" "FT14880_neg" "FT15057_neg" "FT15058_neg"
#> [221] "FT15086_pos" "FT15182_neg" "FT15288_neg" "FT15324_neg" "FT15326_neg"
#> [226] "FT15360_neg" "FT15385_neg" "FT15394_neg" "FT15443_neg" "FT15520_neg"
#> [231] "FT15521_neg" "FT15573_neg" "FT15597_neg" "FT15610_neg" "FT15673_neg"
#> [236] "FT15850_neg" "FT16204_neg" "FT16697_pos" "FT16737_pos" "FT16923_pos"
#> [241] "FT17152_pos" "FT17339_pos" "FT17715_pos" "FT17718_pos" "FT18046_pos"
#> [246] "FT18453_pos" "FT18534_pos" "FT18580_pos" "FT18754_pos" "FT18987_pos"
#> [251] "FT19729_pos" "FT21677_pos" "FT21964_pos" "FT22731_pos" "FT22732_pos"
#> [256] "FT23261_pos" "FT23265_pos" "FT23348_pos" "FT24203_pos" "FT24267_pos"
#> [261] "FT24686_pos" "FT24723_pos" "FT24985_pos" "FT25031_pos" "FT25477_pos"
#> [266] "FT26652_pos" "FT26885_pos" "FT27015_pos" "FT27144_pos" "FT27297_pos"
#> [271] "FT27427_pos" "FT27447_pos" "FT27547_pos" "FT27549_pos" "FT27557_pos"
#> [276] "FT27559_pos" "FT27591_pos" "FT27592_pos" "FT27835_pos" "FT00077_neg"
#> [281] "FT00185_pos" "FT00672_neg" "FT00920_neg" "FT03422_pos" "FT05919_pos"
#> [286] "FT08573_neg" "FT14511_pos" "FT17038_pos" "FT25023_pos" "FT01447_pos"
#> [291] "FT02328_neg" "FT08488_neg" "FT08838_neg" "FT10192_pos" "FT10492_pos"
#> [296] "FT12520_neg" "FT14406_pos" "FT14631_pos" "FT24023_pos" "FT00034_neg"
#> [301] "FT00086_neg" "FT00166_neg" "FT00350_pos" "FT00854_pos" "FT00870_pos"
#> [306] "FT00961_neg" "FT00972_neg" "FT00974_neg" "FT01109_neg" "FT01110_neg"
#> [311] "FT01241_neg" "FT01339_pos" "FT01442_neg" "FT01797_pos" "FT01821_neg"
#> [316] "FT01877_neg" "FT01896_pos" "FT01912_pos" "FT01927_pos" "FT02242_neg"
#> [321] "FT02374_pos" "FT02578_pos" "FT02583_pos" "FT02619_pos" "FT02637_pos"
#> [326] "FT02866_neg" "FT03287_neg" "FT03548_pos" "FT03655_neg" "FT03670_neg"
#> [331] "FT03690_neg" "FT04171_neg" "FT04540_pos" "FT05020_pos" "FT05083_neg"
#> [336] "FT05334_pos" "FT05446_neg" "FT05603_neg" "FT05642_neg" "FT05936_neg"
#> [341] "FT06252_neg" "FT06858_neg" "FT06905_pos" "FT06911_neg" "FT06952_pos"
#> [346] "FT07057_neg" "FT07069_pos" "FT07681_pos" "FT07729_pos" "FT07736_neg"
#> [351] "FT08107_neg" "FT08262_pos" "FT08271_neg" "FT08318_neg" "FT08616_neg"
#> [356] "FT09120_pos" "FT09668_neg" "FT09891_neg" "FT10059_neg" "FT10311_pos"
#> [361] "FT10528_neg" "FT11084_neg" "FT11175_pos" "FT11273_pos" "FT11696_neg"
#> [366] "FT12235_neg" "FT12685_pos" "FT13266_neg" "FT14402_pos" "FT16214_pos"
#> [371] "FT16284_pos" "FT16436_pos" "FT17785_pos" "FT17902_pos" "FT19884_pos"
#> [376] "FT20066_pos" "FT20454_pos" "FT22113_pos" "FT23166_pos" "FT24807_pos"
#> [381] "FT00038_neg" "FT00210_pos" "FT00356_neg" "FT00987_pos" "FT01066_neg"
#> [386] "FT01301_pos" "FT01461_neg" "FT01472_pos" "FT01474_pos" "FT01803_pos"
#> [391] "FT01829_pos" "FT01876_pos" "FT02111_neg" "FT02112_neg" "FT02171_pos"
#> [396] "FT02768_neg" "FT03114_neg" "FT03336_pos" "FT03497_neg" "FT03623_neg"
#> [401] "FT03701_neg" "FT03877_neg" "FT03879_neg" "FT04114_pos" "FT04399_pos"
#> [406] "FT04535_neg" "FT05746_pos" "FT06598_neg" "FT06731_neg" "FT07440_neg"
#> [411] "FT07503_pos" "FT07633_neg" "FT07887_neg" "FT07934_pos" "FT08076_neg"
#> [416] "FT08225_pos" "FT08574_neg" "FT08994_neg" "FT09217_pos" "FT09599_pos"
#> [421] "FT10159_pos" "FT11090_neg" "FT11576_neg" "FT11852_neg" "FT11885_neg"
#> [426] "FT11890_pos" "FT11938_neg" "FT12241_neg" "FT13069_neg" "FT13457_neg"
#> [431] "FT13609_neg" "FT14306_neg" "FT14580_neg" "FT15625_neg" "FT15641_neg"
#> [436] "FT16103_neg" "FT16206_neg" "FT16295_neg" "FT16675_neg" "FT16898_pos"
#> [441] "FT17035_neg" "FT17035_pos" "FT17730_pos" "FT18379_pos" "FT19596_pos"
#> [446] "FT21511_pos" "FT23650_pos" "FT23838_pos" "FT26263_pos" "FT26559_pos"
#> [451] "FT12348_pos" "FT00096_neg" "FT00290_neg" "FT00350_neg" "FT00486_pos"
#> [456] "FT00502_neg" "FT00639_neg" "FT00671_neg" "FT00675_neg" "FT00778_neg"
#> [461] "FT00866_neg" "FT01198_neg" "FT01310_neg" "FT01479_pos" "FT02164_pos"
#> [466] "FT02288_neg" "FT02388_neg" "FT02414_neg" "FT02619_neg" "FT03580_pos"
#> [471] "FT03752_pos" "FT04178_neg" "FT04220_neg" "FT04576_neg" "FT04926_neg"
#> [476] "FT05404_neg" "FT06202_pos" "FT06291_pos" "FT06310_pos" "FT06597_neg"
#> [481] "FT06665_neg" "FT07131_neg" "FT07141_neg" "FT07555_neg" "FT07572_neg"
#> [486] "FT08191_pos" "FT08907_pos" "FT09440_neg" "FT09627_neg" "FT09642_pos"
#> [491] "FT09688_neg" "FT10430_pos" "FT11495_pos" "FT11932_neg" "FT12246_neg"
#> [496] "FT13078_neg" "FT13479_neg" "FT13507_neg" "FT13875_neg" "FT14509_pos"
#> [501] "FT14753_pos" "FT15554_neg" "FT15628_pos" "FT16056_pos" "FT16701_pos"
#> [506] "FT16926_pos" "FT17010_pos" "FT17150_neg" "FT17414_pos" "FT17435_pos"
#> [511] "FT18538_pos" "FT18722_pos" "FT18903_pos" "FT19120_pos" "FT20413_pos"
#> [516] "FT21425_pos" "FT24480_pos" "FT24559_pos" "FT24655_pos" "FT00213_pos"
#> [521] "FT00270_pos" "FT00548_pos" "FT00869_neg" "FT00956_pos" "FT01138_pos"
#> [526] "FT01548_pos" "FT01875_pos" "FT01966_neg" "FT02339_neg" "FT02594_pos"
#> [531] "FT02826_neg" "FT02841_pos" "FT02918_pos" "FT03070_neg" "FT03334_pos"
#> [536] "FT03344_pos" "FT03410_pos" "FT03455_neg" "FT04411_pos" "FT04506_pos"
#> [541] "FT04792_pos" "FT04882_pos" "FT04983_pos" "FT05104_pos" "FT05128_neg"
#> [546] "FT05289_neg" "FT05383_pos" "FT05399_neg" "FT05949_neg" "FT06025_pos"
#> [551] "FT06029_neg" "FT06362_pos" "FT06420_pos" "FT06513_neg" "FT06698_pos"
#> [556] "FT06924_pos" "FT07150_pos" "FT07253_pos" "FT07256_neg" "FT07588_pos"
#> [561] "FT07593_pos" "FT07706_neg" "FT07785_pos" "FT07883_neg" "FT07982_pos"
#> [566] "FT08074_pos" "FT08252_neg" "FT08306_neg" "FT08398_neg" "FT08901_pos"
#> [571] "FT09060_neg" "FT09104_neg" "FT09574_pos" "FT09638_pos" "FT10034_neg"
#> [576] "FT10243_neg" "FT10634_neg" "FT10834_neg" "FT11405_neg" "FT11699_neg"
#> [581] "FT11709_neg" "FT11765_neg" "FT11767_neg" "FT11933_neg" "FT11936_neg"
#> [586] "FT12018_pos" "FT12046_pos" "FT12071_pos" "FT12134_neg" "FT12140_neg"
#> [591] "FT12239_neg" "FT12243_neg" "FT12251_neg" "FT12374_neg" "FT12490_pos"
#> [596] "FT12510_pos" "FT12775_neg" "FT13161_pos" "FT13207_pos" "FT13271_pos"
#> [601] "FT13409_pos" "FT13898_pos" "FT13903_pos" "FT13943_pos" "FT13944_pos"
#> [606] "FT14037_pos" "FT14667_neg" "FT14938_pos" "FT15037_pos" "FT15146_neg"
#> [611] "FT15188_neg" "FT15317_pos" "FT15331_neg" "FT15374_pos" "FT15541_pos"
#> [616] "FT15586_pos" "FT15607_neg" "FT15773_neg" "FT15773_pos" "FT15825_pos"
#> [621] "FT15932_neg" "FT16335_pos" "FT16343_pos" "FT16441_pos" "FT17115_pos"
#> [626] "FT17417_pos" "FT17470_pos" "FT17568_pos" "FT17670_pos" "FT17968_pos"
#> [631] "FT18324_pos" "FT18454_pos" "FT18488_pos" "FT18512_pos" "FT18702_pos"
#> [636] "FT19318_pos" "FT19516_pos" "FT19671_pos" "FT19738_pos" "FT19811_pos"
#> [641] "FT19864_pos" "FT20124_pos" "FT20125_pos" "FT20342_pos" "FT20394_pos"
#> [646] "FT20645_pos" "FT20764_pos" "FT20809_pos" "FT21027_pos" "FT21335_pos"
#> [651] "FT21504_pos" "FT21591_pos" "FT21612_pos" "FT21883_pos" "FT21965_pos"
#> [656] "FT21966_pos" "FT21967_pos" "FT22159_pos" "FT22384_pos" "FT22403_pos"
#> [661] "FT22480_pos" "FT22483_pos" "FT22552_pos" "FT23719_pos" "FT23733_pos"
#> [666] "FT23747_pos" "FT23790_pos" "FT23834_pos" "FT24098_pos" "FT24993_pos"
#> [671] "FT25024_pos" "FT25220_pos" "FT26753_pos" "FT27424_pos" "FT27430_pos"
#> [676] "FT00441_neg" "FT00489_pos" "FT01447_neg" "FT02249_pos" "FT02312_pos"
#> [681] "FT02374_neg" "FT02884_neg" "FT03356_neg" "FT04465_neg" "FT05303_pos"
#> [686] "FT05450_neg" "FT05777_neg" "FT05803_neg" "FT05924_pos" "FT06739_neg"
#> [691] "FT06817_neg" "FT07636_neg" "FT07859_neg" "FT08416_neg" "FT18377_pos"
#> [696] "FT21867_pos" "FT21942_pos" "FT00039_neg" "FT01749_neg" "FT01963_neg"
#> [701] "FT03858_neg" "FT03980_pos" "FT05951_neg" "FT06027_neg" "FT06084_neg"
#> [706] "FT06379_neg" "FT06742_neg" "FT07180_neg" "FT08483_neg" "FT09010_pos"
#> [711] "FT10583_pos" "FT11404_neg" "FT13384_neg" "FT15143_neg" "FT15145_neg"
#> [716] "FT15330_neg" "FT26742_pos" "FT00093_neg" "FT00101_neg" "FT01374_neg"
#> [721] "FT02473_pos" "FT04872_pos" "FT07890_pos" "FT10341_pos" "FT15030_pos"
#> [726] "FT22883_pos" "FT00098_neg" "FT00440_neg" "FT00511_neg" "FT00806_neg"
#> [731] "FT01228_neg" "FT01230_neg" "FT01591_neg" "FT01965_neg" "FT02022_neg"
#> [736] "FT02290_neg" "FT02332_neg" "FT02376_neg" "FT02910_neg" "FT02994_neg"
#> [741] "FT03074_neg" "FT03142_neg" "FT03188_neg" "FT03824_neg" "FT03931_neg"
#> [746] "FT04179_neg" "FT04182_neg" "FT04800_neg" "FT04823_neg" "FT04925_neg"
#> [751] "FT05087_neg" "FT05262_neg" "FT05367_neg" "FT05452_neg" "FT05716_neg"
#> [756] "FT05780_neg" "FT06203_neg" "FT06282_neg" "FT06559_neg" "FT06733_neg"
#> [761] "FT07096_neg" "FT07218_neg" "FT07885_neg" "FT08417_pos" "FT08759_neg"
#> [766] "FT08761_neg" "FT09926_neg" "FT11302_neg" "FT11303_neg" "FT11354_neg"
#> [771] "FT11449_neg" "FT11648_neg" "FT12043_neg" "FT12939_pos" "FT13217_neg"
#> [776] "FT13420_neg" "FT13612_pos" "FT14178_neg" "FT14455_neg" "FT14565_neg"
#> [781] "FT14653_neg" "FT14701_neg" "FT14785_neg" "FT15165_neg" "FT15893_neg"
#> [786] "FT15980_neg" "FT16889_neg" "FT18536_pos" "FT19438_pos" "FT20023_pos"
#> [791] "FT24513_pos"
print(sel_pls_comp_list$`_multiclass_metrics_`)
#> $accuracy
#> [1] 1
#>
#> $balanced.accuracy
#> A.gracilis A.hyalina M.fragrans R.beyrichiana R.bifurca
#> 1 1 1 1 1
#> R.canaliculata R.cavernosa R.ciliifera R.gothica R.gougetiana
#> 1 1 1 1 1
#> R.hemisphaerica R.huebeneriana R.sorocarpa R.subbifurca
#> 1 1 1 1
#>
#> $DOR
#> A.gracilis A.hyalina M.fragrans R.beyrichiana R.bifurca
#> Inf Inf Inf Inf Inf
#> R.canaliculata R.cavernosa R.ciliifera R.gothica R.gougetiana
#> Inf Inf Inf Inf Inf
#> R.hemisphaerica R.huebeneriana R.sorocarpa R.subbifurca
#> Inf Inf Inf Inf
#>
#> $error.rate
#> [1] 0
#>
#> $F0.5
#> A.gracilis A.hyalina M.fragrans R.beyrichiana R.bifurca
#> 1 1 1 1 1
#> R.canaliculata R.cavernosa R.ciliifera R.gothica R.gougetiana
#> 1 1 1 1 1
#> R.hemisphaerica R.huebeneriana R.sorocarpa R.subbifurca
#> 1 1 1 1
#>
#> $F1
#> A.gracilis A.hyalina M.fragrans R.beyrichiana R.bifurca
#> 1 1 1 1 1
#> R.canaliculata R.cavernosa R.ciliifera R.gothica R.gougetiana
#> 1 1 1 1 1
#> R.hemisphaerica R.huebeneriana R.sorocarpa R.subbifurca
#> 1 1 1 1
#>
#> $F2
#> A.gracilis A.hyalina M.fragrans R.beyrichiana R.bifurca
#> 1 1 1 1 1
#> R.canaliculata R.cavernosa R.ciliifera R.gothica R.gougetiana
#> 1 1 1 1 1
#> R.hemisphaerica R.huebeneriana R.sorocarpa R.subbifurca
#> 1 1 1 1
#>
#> $FDR
#> A.gracilis A.hyalina M.fragrans R.beyrichiana R.bifurca
#> 0 0 0 0 0
#> R.canaliculata R.cavernosa R.ciliifera R.gothica R.gougetiana
#> 0 0 0 0 0
#> R.hemisphaerica R.huebeneriana R.sorocarpa R.subbifurca
#> 0 0 0 0
#>
#> $FNR
#> A.gracilis A.hyalina M.fragrans R.beyrichiana R.bifurca
#> 0 0 0 0 0
#> R.canaliculata R.cavernosa R.ciliifera R.gothica R.gougetiana
#> 0 0 0 0 0
#> R.hemisphaerica R.huebeneriana R.sorocarpa R.subbifurca
#> 0 0 0 0
#>
#> $FOR
#> A.gracilis A.hyalina M.fragrans R.beyrichiana R.bifurca
#> 0 0 0 0 0
#> R.canaliculata R.cavernosa R.ciliifera R.gothica R.gougetiana
#> 0 0 0 0 0
#> R.hemisphaerica R.huebeneriana R.sorocarpa R.subbifurca
#> 0 0 0 0
#>
#> $FPR
#> A.gracilis A.hyalina M.fragrans R.beyrichiana R.bifurca
#> 0 0 0 0 0
#> R.canaliculata R.cavernosa R.ciliifera R.gothica R.gougetiana
#> 0 0 0 0 0
#> R.hemisphaerica R.huebeneriana R.sorocarpa R.subbifurca
#> 0 0 0 0
#>
#> $geometric.mean
#> A.gracilis A.hyalina M.fragrans R.beyrichiana R.bifurca
#> 1 1 1 1 1
#> R.canaliculata R.cavernosa R.ciliifera R.gothica R.gougetiana
#> 1 1 1 1 1
#> R.hemisphaerica R.huebeneriana R.sorocarpa R.subbifurca
#> 1 1 1 1
#>
#> $Jaccard
#> A.gracilis A.hyalina M.fragrans R.beyrichiana R.bifurca
#> 1 1 1 1 1
#> R.canaliculata R.cavernosa R.ciliifera R.gothica R.gougetiana
#> 1 1 1 1 1
#> R.hemisphaerica R.huebeneriana R.sorocarpa R.subbifurca
#> 1 1 1 1
#>
#> $L
#> A.gracilis A.hyalina M.fragrans R.beyrichiana R.bifurca
#> Inf Inf Inf Inf Inf
#> R.canaliculata R.cavernosa R.ciliifera R.gothica R.gougetiana
#> Inf Inf Inf Inf Inf
#> R.hemisphaerica R.huebeneriana R.sorocarpa R.subbifurca
#> Inf Inf Inf Inf
#>
#> $lambda
#> A.gracilis A.hyalina M.fragrans R.beyrichiana R.bifurca
#> 0 0 0 0 0
#> R.canaliculata R.cavernosa R.ciliifera R.gothica R.gougetiana
#> 0 0 0 0 0
#> R.hemisphaerica R.huebeneriana R.sorocarpa R.subbifurca
#> 0 0 0 0
#>
#> $MCC
#> A.gracilis A.hyalina M.fragrans R.beyrichiana R.bifurca
#> 1 1 1 1 1
#> R.canaliculata R.cavernosa R.ciliifera R.gothica R.gougetiana
#> 1 1 1 1 1
#> R.hemisphaerica R.huebeneriana R.sorocarpa R.subbifurca
#> 1 1 1 1
#>
#> $MK
#> A.gracilis A.hyalina M.fragrans R.beyrichiana R.bifurca
#> 1 1 1 1 1
#> R.canaliculata R.cavernosa R.ciliifera R.gothica R.gougetiana
#> 1 1 1 1 1
#> R.hemisphaerica R.huebeneriana R.sorocarpa R.subbifurca
#> 1 1 1 1
#>
#> $NPV
#> A.gracilis A.hyalina M.fragrans R.beyrichiana R.bifurca
#> 1 1 1 1 1
#> R.canaliculata R.cavernosa R.ciliifera R.gothica R.gougetiana
#> 1 1 1 1 1
#> R.hemisphaerica R.huebeneriana R.sorocarpa R.subbifurca
#> 1 1 1 1
#>
#> $OP
#> A.gracilis A.hyalina M.fragrans R.beyrichiana R.bifurca
#> 1 1 1 1 1
#> R.canaliculata R.cavernosa R.ciliifera R.gothica R.gougetiana
#> 1 1 1 1 1
#> R.hemisphaerica R.huebeneriana R.sorocarpa R.subbifurca
#> 1 1 1 1
#>
#> $precision
#> A.gracilis A.hyalina M.fragrans R.beyrichiana R.bifurca
#> 1 1 1 1 1
#> R.canaliculata R.cavernosa R.ciliifera R.gothica R.gougetiana
#> 1 1 1 1 1
#> R.hemisphaerica R.huebeneriana R.sorocarpa R.subbifurca
#> 1 1 1 1
#>
#> $recall
#> A.gracilis A.hyalina M.fragrans R.beyrichiana R.bifurca
#> 1 1 1 1 1
#> R.canaliculata R.cavernosa R.ciliifera R.gothica R.gougetiana
#> 1 1 1 1 1
#> R.hemisphaerica R.huebeneriana R.sorocarpa R.subbifurca
#> 1 1 1 1
#>
#> $specificity
#> A.gracilis A.hyalina M.fragrans R.beyrichiana R.bifurca
#> 1 1 1 1 1
#> R.canaliculata R.cavernosa R.ciliifera R.gothica R.gougetiana
#> 1 1 1 1 1
#> R.hemisphaerica R.huebeneriana R.sorocarpa R.subbifurca
#> 1 1 1 1
#>
#> $Youden
#> A.gracilis A.hyalina M.fragrans R.beyrichiana R.bifurca
#> 1 1 1 1 1
#> R.canaliculata R.cavernosa R.ciliifera R.gothica R.gougetiana
#> 1 1 1 1 1
#> R.hemisphaerica R.huebeneriana R.sorocarpa R.subbifurca
#> 1 1 1 1
print(sel_pls_comp_list$`_model_r2_`)
#> Rsquared
#> 1Draw heatmap.
heatmap.selected_features(feat_list=marchantiales$comp_list, sel_feat=sel_pls_comp_list$`_selected_variables_`, sample_colors=marchantiales$colors, plot_width=10, plot_height=10, filename=NULL, main="PLS-DA")Now, use Random Forest to select for essential variables. RF will also be used for the other abstraction levels.
suppressWarnings(
sel_rf_comp_list <- select_features_random_forest(feat_matrix=marchantiales$comp_list, sel_factor=as.factor(marchantiales$metadata$species), sel_colors=marchantiales$metadata$color, tune_length=10, quantile_threshold=0.95, plot_roc_filename=NULL)
)Print selected variables and model metrics.
print(paste("Number of essential variables:", length(unique(unlist(sel_rf_comp_list$`_selected_variables_`)))))
#> [1] "Number of essential variables: 178"
print(sel_rf_comp_list$`_selected_variables_`)
#> [1] "FT00731_neg" "FT00949_neg" "FT01576_neg" "FT01637_neg" "FT02335_neg"
#> [6] "FT03195_neg" "FT05134_neg" "FT05511_neg" "FT05882_neg" "FT08334_neg"
#> [11] "FT11246_neg" "FT11756_neg" "FT14063_neg" "FT00065_neg" "FT00767_neg"
#> [16] "FT01246_neg" "FT01317_pos" "FT02412_neg" "FT02482_neg" "FT03182_neg"
#> [21] "FT04167_neg" "FT04335_pos" "FT04839_pos" "FT05764_pos" "FT06355_neg"
#> [26] "FT06923_pos" "FT07069_neg" "FT07227_pos" "FT07307_neg" "FT07448_neg"
#> [31] "FT07947_pos" "FT09074_neg" "FT09260_neg" "FT10307_pos" "FT12126_pos"
#> [36] "FT12495_pos" "FT12737_neg" "FT13156_pos" "FT13236_neg" "FT13611_neg"
#> [41] "FT15597_neg" "FT17340_pos" "FT18974_pos" "FT20130_pos" "FT20623_pos"
#> [46] "FT21504_pos" "FT22558_pos" "FT23905_pos" "FT25471_pos" "FT27295_pos"
#> [51] "FT27591_pos" "FT28080_pos" "FT28171_pos" "FT00205_pos" "FT00319_neg"
#> [56] "FT00769_neg" "FT00770_neg" "FT01604_neg" "FT04416_pos" "FT07710_pos"
#> [61] "FT08793_pos" "FT10029_pos" "FT19424_pos" "FT00676_neg" "FT02047_neg"
#> [66] "FT02324_pos" "FT03122_neg" "FT04692_pos" "FT04787_neg" "FT07551_neg"
#> [71] "FT08787_neg" "FT11859_neg" "FT00034_neg" "FT01821_neg" "FT02374_pos"
#> [76] "FT02716_pos" "FT02866_neg" "FT04637_neg" "FT05773_pos" "FT06905_pos"
#> [81] "FT06952_pos" "FT22113_pos" "FT00821_neg" "FT01066_neg" "FT01552_neg"
#> [86] "FT02321_neg" "FT04424_pos" "FT06181_neg" "FT06723_neg" "FT06940_pos"
#> [91] "FT07221_neg" "FT08051_neg" "FT08865_pos" "FT09155_neg" "FT09623_pos"
#> [96] "FT09713_neg" "FT09740_pos" "FT11003_neg" "FT13232_pos" "FT13485_pos"
#> [101] "FT13851_pos" "FT14580_neg" "FT15625_neg" "FT15847_neg" "FT18419_pos"
#> [106] "FT18499_pos" "FT19431_pos" "FT20331_pos" "FT20555_pos" "FT00185_pos"
#> [111] "FT01068_pos" "FT01124_pos" "FT01221_pos" "FT05995_pos" "FT12976_pos"
#> [116] "FT16905_pos" "FT17246_neg" "FT00708_pos" "FT03418_pos" "FT04480_pos"
#> [121] "FT05010_pos" "FT08981_neg" "FT12265_pos" "FT14768_pos" "FT16221_pos"
#> [126] "FT00434_pos" "FT00758_neg" "FT01473_neg" "FT02753_neg" "FT02965_neg"
#> [131] "FT05058_pos" "FT09440_neg" "FT16219_neg" "FT00396_pos" "FT04122_pos"
#> [136] "FT04444_pos" "FT06669_pos" "FT06891_pos" "FT06924_pos" "FT08517_neg"
#> [141] "FT11405_neg" "FT12746_pos" "FT29035_pos" "FT04127_pos" "FT05405_pos"
#> [146] "FT05761_pos" "FT08059_pos" "FT14344_pos" "FT17028_pos" "FT18800_pos"
#> [151] "FT21942_pos" "FT22236_pos" "FT03559_neg" "FT03858_neg" "FT05760_pos"
#> [156] "FT06027_neg" "FT06707_neg" "FT08485_pos" "FT17345_pos" "FT17937_pos"
#> [161] "FT22743_pos" "FT00037_neg" "FT00995_pos" "FT02849_pos" "FT03698_pos"
#> [166] "FT04499_pos" "FT04805_pos" "FT14841_pos" "FT22131_pos" "FT00701_pos"
#> [171] "FT02579_pos" "FT08242_pos" "FT08417_pos" "FT09562_pos" "FT10722_neg"
#> [176] "FT10852_pos" "FT11606_pos" "FT14519_pos"
print(sel_rf_comp_list$`_multiclass_metrics_`)
#> $accuracy
#> [1] 1
#>
#> $balanced.accuracy
#> A.gracilis A.hyalina M.fragrans R.beyrichiana R.bifurca
#> 1 1 1 1 1
#> R.canaliculata R.cavernosa R.ciliifera R.gothica R.gougetiana
#> 1 1 1 1 1
#> R.hemisphaerica R.huebeneriana R.sorocarpa R.subbifurca
#> 1 1 1 1
#>
#> $DOR
#> A.gracilis A.hyalina M.fragrans R.beyrichiana R.bifurca
#> Inf Inf Inf Inf Inf
#> R.canaliculata R.cavernosa R.ciliifera R.gothica R.gougetiana
#> Inf Inf Inf Inf Inf
#> R.hemisphaerica R.huebeneriana R.sorocarpa R.subbifurca
#> Inf Inf Inf Inf
#>
#> $error.rate
#> [1] 0
#>
#> $F0.5
#> A.gracilis A.hyalina M.fragrans R.beyrichiana R.bifurca
#> 1 1 1 1 1
#> R.canaliculata R.cavernosa R.ciliifera R.gothica R.gougetiana
#> 1 1 1 1 1
#> R.hemisphaerica R.huebeneriana R.sorocarpa R.subbifurca
#> 1 1 1 1
#>
#> $F1
#> A.gracilis A.hyalina M.fragrans R.beyrichiana R.bifurca
#> 1 1 1 1 1
#> R.canaliculata R.cavernosa R.ciliifera R.gothica R.gougetiana
#> 1 1 1 1 1
#> R.hemisphaerica R.huebeneriana R.sorocarpa R.subbifurca
#> 1 1 1 1
#>
#> $F2
#> A.gracilis A.hyalina M.fragrans R.beyrichiana R.bifurca
#> 1 1 1 1 1
#> R.canaliculata R.cavernosa R.ciliifera R.gothica R.gougetiana
#> 1 1 1 1 1
#> R.hemisphaerica R.huebeneriana R.sorocarpa R.subbifurca
#> 1 1 1 1
#>
#> $FDR
#> A.gracilis A.hyalina M.fragrans R.beyrichiana R.bifurca
#> 0 0 0 0 0
#> R.canaliculata R.cavernosa R.ciliifera R.gothica R.gougetiana
#> 0 0 0 0 0
#> R.hemisphaerica R.huebeneriana R.sorocarpa R.subbifurca
#> 0 0 0 0
#>
#> $FNR
#> A.gracilis A.hyalina M.fragrans R.beyrichiana R.bifurca
#> 0 0 0 0 0
#> R.canaliculata R.cavernosa R.ciliifera R.gothica R.gougetiana
#> 0 0 0 0 0
#> R.hemisphaerica R.huebeneriana R.sorocarpa R.subbifurca
#> 0 0 0 0
#>
#> $FOR
#> A.gracilis A.hyalina M.fragrans R.beyrichiana R.bifurca
#> 0 0 0 0 0
#> R.canaliculata R.cavernosa R.ciliifera R.gothica R.gougetiana
#> 0 0 0 0 0
#> R.hemisphaerica R.huebeneriana R.sorocarpa R.subbifurca
#> 0 0 0 0
#>
#> $FPR
#> A.gracilis A.hyalina M.fragrans R.beyrichiana R.bifurca
#> 0 0 0 0 0
#> R.canaliculata R.cavernosa R.ciliifera R.gothica R.gougetiana
#> 0 0 0 0 0
#> R.hemisphaerica R.huebeneriana R.sorocarpa R.subbifurca
#> 0 0 0 0
#>
#> $geometric.mean
#> A.gracilis A.hyalina M.fragrans R.beyrichiana R.bifurca
#> 1 1 1 1 1
#> R.canaliculata R.cavernosa R.ciliifera R.gothica R.gougetiana
#> 1 1 1 1 1
#> R.hemisphaerica R.huebeneriana R.sorocarpa R.subbifurca
#> 1 1 1 1
#>
#> $Jaccard
#> A.gracilis A.hyalina M.fragrans R.beyrichiana R.bifurca
#> 1 1 1 1 1
#> R.canaliculata R.cavernosa R.ciliifera R.gothica R.gougetiana
#> 1 1 1 1 1
#> R.hemisphaerica R.huebeneriana R.sorocarpa R.subbifurca
#> 1 1 1 1
#>
#> $L
#> A.gracilis A.hyalina M.fragrans R.beyrichiana R.bifurca
#> Inf Inf Inf Inf Inf
#> R.canaliculata R.cavernosa R.ciliifera R.gothica R.gougetiana
#> Inf Inf Inf Inf Inf
#> R.hemisphaerica R.huebeneriana R.sorocarpa R.subbifurca
#> Inf Inf Inf Inf
#>
#> $lambda
#> A.gracilis A.hyalina M.fragrans R.beyrichiana R.bifurca
#> 0 0 0 0 0
#> R.canaliculata R.cavernosa R.ciliifera R.gothica R.gougetiana
#> 0 0 0 0 0
#> R.hemisphaerica R.huebeneriana R.sorocarpa R.subbifurca
#> 0 0 0 0
#>
#> $MCC
#> A.gracilis A.hyalina M.fragrans R.beyrichiana R.bifurca
#> 1 1 1 1 1
#> R.canaliculata R.cavernosa R.ciliifera R.gothica R.gougetiana
#> 1 1 1 1 1
#> R.hemisphaerica R.huebeneriana R.sorocarpa R.subbifurca
#> 1 1 1 1
#>
#> $MK
#> A.gracilis A.hyalina M.fragrans R.beyrichiana R.bifurca
#> 1 1 1 1 1
#> R.canaliculata R.cavernosa R.ciliifera R.gothica R.gougetiana
#> 1 1 1 1 1
#> R.hemisphaerica R.huebeneriana R.sorocarpa R.subbifurca
#> 1 1 1 1
#>
#> $NPV
#> A.gracilis A.hyalina M.fragrans R.beyrichiana R.bifurca
#> 1 1 1 1 1
#> R.canaliculata R.cavernosa R.ciliifera R.gothica R.gougetiana
#> 1 1 1 1 1
#> R.hemisphaerica R.huebeneriana R.sorocarpa R.subbifurca
#> 1 1 1 1
#>
#> $OP
#> A.gracilis A.hyalina M.fragrans R.beyrichiana R.bifurca
#> 1 1 1 1 1
#> R.canaliculata R.cavernosa R.ciliifera R.gothica R.gougetiana
#> 1 1 1 1 1
#> R.hemisphaerica R.huebeneriana R.sorocarpa R.subbifurca
#> 1 1 1 1
#>
#> $precision
#> A.gracilis A.hyalina M.fragrans R.beyrichiana R.bifurca
#> 1 1 1 1 1
#> R.canaliculata R.cavernosa R.ciliifera R.gothica R.gougetiana
#> 1 1 1 1 1
#> R.hemisphaerica R.huebeneriana R.sorocarpa R.subbifurca
#> 1 1 1 1
#>
#> $recall
#> A.gracilis A.hyalina M.fragrans R.beyrichiana R.bifurca
#> 1 1 1 1 1
#> R.canaliculata R.cavernosa R.ciliifera R.gothica R.gougetiana
#> 1 1 1 1 1
#> R.hemisphaerica R.huebeneriana R.sorocarpa R.subbifurca
#> 1 1 1 1
#>
#> $specificity
#> A.gracilis A.hyalina M.fragrans R.beyrichiana R.bifurca
#> 1 1 1 1 1
#> R.canaliculata R.cavernosa R.ciliifera R.gothica R.gougetiana
#> 1 1 1 1 1
#> R.hemisphaerica R.huebeneriana R.sorocarpa R.subbifurca
#> 1 1 1 1
#>
#> $Youden
#> A.gracilis A.hyalina M.fragrans R.beyrichiana R.bifurca
#> 1 1 1 1 1
#> R.canaliculata R.cavernosa R.ciliifera R.gothica R.gougetiana
#> 1 1 1 1 1
#> R.hemisphaerica R.huebeneriana R.sorocarpa R.subbifurca
#> 1 1 1 1
print(sel_rf_comp_list$`_model_r2_`)
#> Rsquared
#> 1Draw heatmap.
heatmap.selected_features(feat_list=marchantiales$comp_list, sel_feat=sel_rf_comp_list$`_selected_variables_`, sample_colors=marchantiales$metadata$color, plot_width=8, plot_height=6, cex_col=0.1, cex_row=0.4, filename=NULL, main="Random Forest")Draw interactive heatmap. In the following we will only plot the interactive heatmap.
library(heatmaply)
heatmaply(scale(marchantiales$comp_list[, which(colnames(marchantiales$comp_list) %in% sel_rf_comp_list$`_selected_variables_`)]), k_row=1, k_col=1, colors=colorRampPalette(c('darkblue','white','darkred'), alpha=0.1, bias=1)(256), file=NULL, selfcontained=TRUE, fontsize_row=6, fontsize_col=3)Use Random Forest to select for essential variables.
suppressWarnings(
sel_rf_class_list <- select_features_random_forest(feat_matrix=marchantiales$class_list, sel_factor=as.factor(marchantiales$metadata$species), sel_colors=marchantiales$metadata$color, tune_length=10, quantile_threshold=0.95, plot_roc_filename=NULL)
)Print selected variables and R-squared.
print(paste("Number of essential variables:", length(unique(unlist(sel_rf_class_list$`_selected_variables_`)))))
#> [1] "Number of essential variables: 113"
print(sel_rf_class_list$`_selected_variables_`)
#> [1] "Diarylthioethers"
#> [2] "Disaccharides"
#> [3] "Diterpene.glycosides"
#> [4] "Guanidines"
#> [5] "Hydrolyzable.tannins"
#> [6] "Quinoline.carboxylic.acids"
#> [7] "Tannins"
#> [8] "Tyrosols"
#> [9] "X1.1Z.alkenyl.2.acylglycerophosphoethanolamines"
#> [10] "X5.deoxyribonucleosides"
#> [11] "Anthracenecarboxylic.acids"
#> [12] "Aromatic.monoterpenoids"
#> [13] "Aryl.fluorides"
#> [14] "Benzamides"
#> [15] "Dithioles"
#> [16] "Glycerophospholipids"
#> [17] "Nitrobenzoic.acids.and.derivatives"
#> [18] "Oligosaccharides"
#> [19] "Pyrimidine.ribonucleoside.diphosphates"
#> [20] "Triterpenoids"
#> [21] "Alkyl.aryl.ethers"
#> [22] "Aminopyrimidines.and.derivatives"
#> [23] "Cyclic.depsipeptides"
#> [24] "Fluorobenzenes"
#> [25] "Pteridines.and.derivatives"
#> [26] "Purine.3.deoxyribonucleosides"
#> [27] "Pyrimidine.ribonucleoside.monophosphates"
#> [28] "Tetraalkylammonium.salts"
#> [29] "X4.hydroxyflavonoids"
#> [30] "Alkoxy.S.triazines"
#> [31] "Alkylarylthioethers"
#> [32] "Ceramides"
#> [33] "X5.deoxy.5.thionucleosides"
#> [34] "X7.hydroxysteroids"
#> [35] "Alkyl.phosphates"
#> [36] "Azobenzenes"
#> [37] "Carbonyl.compounds"
#> [38] "Gamma.keto.acids.and.derivatives"
#> [39] "Methoxybenzoic.acids.and.derivatives"
#> [40] "Methoxyphenols"
#> [41] "Organic.disulfides"
#> [42] "Piperidines"
#> [43] "Pyranoflavonoids"
#> [44] "X6.aminopurines"
#> [45] "Aldehydes"
#> [46] "Benzenediols"
#> [47] "Benzo.1.4.dioxanes"
#> [48] "Flavonoid.glycosides"
#> [49] "Long.chain.fatty.acids"
#> [50] "Nitrogen.mustard.compounds"
#> [51] "Organooxygen.compounds"
#> [52] "Phenylsulfates"
#> [53] "Saxitoxins..gonyautoxins..and.derivatives"
#> [54] "X1.hydroxy.2.unsubstituted.benzenoids"
#> [55] "Acyclic.olefins"
#> [56] "Boronic.acid.esters"
#> [57] "Chlorohydrins"
#> [58] "Halopyrimidines"
#> [59] "Medium.chain.keto.acids.and.derivatives"
#> [60] "Purine.nucleosides"
#> [61] "Purine.ribonucleoside.monophosphates"
#> [62] "Purinones"
#> [63] "Thiophene.carboxylic.acids.and.derivatives"
#> [64] "Alkyl.fluorides"
#> [65] "Fatty.alcohols"
#> [66] "Peptides"
#> [67] "Styrenes"
#> [68] "X2.4.disubstituted.thiazoles"
#> [69] "Benzylamines"
#> [70] "Flavonoid.O.glycosides"
#> [71] "Isoflavonoid.O.glycosides"
#> [72] "Organic.phosphoric.acids.and.derivatives"
#> [73] "Pregnane.type.alkaloids"
#> [74] "Thiazoles"
#> [75] "X1.benzopyrans"
#> [76] "Alkaloids.and.derivatives"
#> [77] "Benzenoids"
#> [78] "Branched.alkanes"
#> [79] "Cardenolides.and.derivatives"
#> [80] "Indoles"
#> [81] "Ortho.amides"
#> [82] "Propargyl.type.1.3.dipolar.organic.compounds"
#> [83] "Amino.acids.and.derivatives"
#> [84] "Azasteroids.and.derivatives"
#> [85] "Heteroaromatic.compounds"
#> [86] "Hybrid.peptides"
#> [87] "Hydroxy.fatty.acids"
#> [88] "Purines.and.purine.derivatives"
#> [89] "Sugar.alcohols"
#> [90] "Tetrapyrroles.and.derivatives"
#> [91] "Aminobenzenesulfonamides"
#> [92] "Aromatic.anilides"
#> [93] "Azoles"
#> [94] "Catechols"
#> [95] "Lipids.and.lipid.like.molecules"
#> [96] "Steroids.and.steroid.derivatives"
#> [97] "X3.5.cyclic.purine.nucleotides"
#> [98] "Biphenyls.and.derivatives"
#> [99] "Hydrazines.and.derivatives"
#> [100] "Organic.phosphonic.acids"
#> [101] "Orthocarboxylic.acid.derivatives"
#> [102] "Podophyllotoxins"
#> [103] "Pyrimidones"
#> [104] "Pyrroloindoles"
#> [105] "Sulfenyl.compounds"
#> [106] "X6.O.methylated.flavonoids"
#> [107] "Alkyl.sulfates"
#> [108] "Diazines"
#> [109] "Dicarboxylic.acids.and.derivatives"
#> [110] "Furofuran.lignans"
#> [111] "Indoles.and.derivatives"
#> [112] "Prenol.lipids"
#> [113] "Primary.amines"
#print(sel_rf_class_list$`_multiclass_metrics_`)
print(sel_rf_class_list$`_model_r2_`)
#> Rsquared
#> 1Draw interactive heatmap.
Use Random Forest to select for essential variables.
suppressWarnings(
sel_rf_subclass_list <- select_features_random_forest(feat_matrix=marchantiales$subclass_list, sel_factor=as.factor(marchantiales$metadata$species), sel_colors=marchantiales$metadata$color, tune_length=10, quantile_threshold=0.95, plot_roc_filename=NULL)
)Print selected variables and R-squared.
print(paste("Number of essential variables:", length(unique(unlist(sel_rf_subclass_list$`_selected_variables_`)))))
#> [1] "Number of essential variables: 70"
print(sel_rf_subclass_list$`_selected_variables_`)
#> [1] "Anthracenes"
#> [2] "Glycerophospholipids"
#> [3] "Harmala.alkaloids"
#> [4] "Organic.compounds"
#> [5] "Organic.nitrogen.compounds"
#> [6] "Organonitrogen.compounds"
#> [7] "Organooxygen.compounds"
#> [8] "Purine.nucleosides"
#> [9] "Tannins"
#> [10] "X5.deoxyribonucleosides"
#> [11] "Dihydrofurans"
#> [12] "Lignans..neolignans.and.related.compounds"
#> [13] "Nucleosides..nucleotides..and.analogues"
#> [14] "Organic.acids.and.derivatives"
#> [15] "Phenol.ethers"
#> [16] "Pteridines.and.derivatives"
#> [17] "Purine.nucleotides"
#> [18] "Pyrans"
#> [19] "X5.5.dinucleotides"
#> [20] "Benzenoids"
#> [21] "Diarylheptanoids"
#> [22] "Diazanaphthalenes"
#> [23] "Furans"
#> [24] "Organic.carbonic.acids.and.derivatives"
#> [25] "Organic.phosphoric.acids.and.derivatives"
#> [26] "Organophosphorus.compounds"
#> [27] "Pyrimidine.nucleosides"
#> [28] "Benzopyrans"
#> [29] "Carboxylic.acids.and.derivatives"
#> [30] "Stilbenes"
#> [31] "Allyl.type.1.3.dipolar.organic.compounds"
#> [32] "Azobenzenes"
#> [33] "Organic.disulfides"
#> [34] "Oxanes"
#> [35] "Piperidines"
#> [36] "Benzodioxanes"
#> [37] "Ergoline.and.derivatives"
#> [38] "Peptidomimetics"
#> [39] "Saxitoxins..gonyautoxins..and.derivatives"
#> [40] "Sulfonyls"
#> [41] "Alkaloids.and.derivatives"
#> [42] "Boronic.acid.derivatives"
#> [43] "Diazines"
#> [44] "Glycerolipids"
#> [45] "Halohydrins"
#> [46] "Keto.acids.and.derivatives"
#> [47] "Organic.oxygen.compounds"
#> [48] "Benzene.and.substituted.derivatives"
#> [49] "Prenol.lipids"
#> [50] "Tetracyclines"
#> [51] "Aryl.halides"
#> [52] "Tetrapyrroles.and.derivatives"
#> [53] "Biotin.and.derivatives"
#> [54] "Lupin.alkaloids"
#> [55] "Organoheterocyclic.compounds"
#> [56] "Oxacyclic.compounds"
#> [57] "Propargyl.type.1.3.dipolar.organic.compounds"
#> [58] "Saturated.hydrocarbons"
#> [59] "Heteroaromatic.compounds"
#> [60] "Steroids.and.steroid.derivatives"
#> [61] "Triazines"
#> [62] "Fatty.Acyls"
#> [63] "Dioxaborolanes"
#> [64] "Flavonoids"
#> [65] "Organic.phosphonic.acids.and.derivatives"
#> [66] "Orthocarboxylic.acid.derivatives"
#> [67] "Pyrroles"
#> [68] "Furanoid.lignans"
#> [69] "Indoles.and.derivatives"
#> [70] "Organohalogen.compounds"
#print(sel_rf_subclass_list$`_multiclass_metrics_`)
print(sel_rf_subclass_list$`_model_r2_`)
#> Rsquared
#> 0.9643195Draw interactive heatmap.
heatmaply(scale(marchantiales$subclass_list[, which(colnames(marchantiales$subclass_list) %in% sel_rf_subclass_list$`_selected_variables_`)]), k_row=1, k_col=1, colors=colorRampPalette(c('darkblue','white','darkred'), alpha=0.1, bias=1)(256), file=NULL, selfcontained=TRUE, fontsize_row=6, fontsize_col=3)Use Random Forest to select for essential variables.
suppressWarnings(
sel_rf_superclass_list <- select_features_random_forest(feat_matrix=marchantiales$superclass_list, sel_factor=as.factor(marchantiales$metadata$species), sel_colors=marchantiales$metadata$color, tune_length=10, quantile_threshold=0.95, plot_roc_filename=NULL)
)Print selected variables and R-squared.
print(paste("Number of essential variables:", length(unique(unlist(sel_rf_superclass_list$`_selected_variables_`)))))
#> [1] "Number of essential variables: 16"
print(sel_rf_superclass_list$`_selected_variables_`)
#> [1] "Lignans..neolignans.and.related.compounds"
#> [2] "Lipids.and.lipid.like.molecules"
#> [3] "Nucleosides..nucleotides..and.analogues"
#> [4] "Organic.acids.and.derivatives"
#> [5] "Organic.compounds"
#> [6] "Organic.nitrogen.compounds"
#> [7] "Organic.oxygen.compounds"
#> [8] "Organoheterocyclic.compounds"
#> [9] "Organosulfur.compounds"
#> [10] "Phenylpropanoids.and.polyketides"
#> [11] "Benzenoids"
#> [12] "Organophosphorus.compounds"
#> [13] "Hydrocarbons"
#> [14] "Organic.1.3.dipolar.compounds"
#> [15] "Organohalogen.compounds"
#> [16] "Alkaloids.and.derivatives"
#print(sel_rf_superclass_list$`_multiclass_metrics_`)
print(sel_rf_superclass_list$`_model_r2_`)
#> Rsquared
#> 0.7605452Draw interactive heatmap.
heatmaply(scale(marchantiales$superclass_list[, which(colnames(marchantiales$superclass_list) %in% sel_rf_superclass_list$`_selected_variables_`)]), k_row=1, k_col=1, colors=colorRampPalette(c('darkblue','white','darkred'), alpha=0.1, bias=1)(256), file=NULL, selfcontained=TRUE, fontsize_row=6, fontsize_col=3)Use Random Forest to select for essential variables.
suppressWarnings(
sel_rf_npclass_list <- select_features_random_forest(feat_matrix=marchantiales$npclass_list, sel_factor=as.factor(marchantiales$metadata$species), sel_colors=marchantiales$metadata$color, tune_length=10, quantile_threshold=0.95, plot_roc_filename=NULL)
)Print selected variables and R-squared.
print(paste("Number of essential variables:", length(unique(unlist(sel_rf_npclass_list$`_selected_variables_`)))))
#> [1] "Number of essential variables: 84"
print(sel_rf_npclass_list$`_selected_variables_`)
#> [1] "Ascarosides"
#> [2] "Ceramides"
#> [3] "Disaccharides"
#> [4] "Flavones"
#> [5] "Gallotannins"
#> [6] "Phenazine.alkaloids"
#> [7] "Phenylethanoids"
#> [8] "Polyamines"
#> [9] "Polysaccharides"
#> [10] "Tropane.alkaloids"
#> [11] "Aminosugars"
#> [12] "Anthraquinones.and.anthrones"
#> [13] "Cyclic.peptides"
#> [14] "Glycerophosphoinositols"
#> [15] "Norlabdane.diterpenoids"
#> [16] "pteridine.alkaloids"
#> [17] "Pulvinones"
#> [18] "Secoiridoid.monoterpenoids"
#> [19] "Triacylglycerols"
#> [20] "Aminoglycosides"
#> [21] "Glycosylmonoacylglycerols"
#> [22] "Irregular.monoterpenoids"
#> [23] "Kaurane.and.Phyllocladane.diterpenoids"
#> [24] "Linear.peptides"
#> [25] "Open.chain.polyketides"
#> [26] "Purine.alkaloids"
#> [27] "Pyrrole.alkaloids"
#> [28] "Saxitoxins"
#> [29] "Oxidized.glycerophospholipids"
#> [30] "Simple.indole.alkaloids"
#> [31] "Simple.phenolic.acids"
#> [32] "Tripeptides"
#> [33] "X2.pyrone.derivatives"
#> [34] "Amino.fatty.acids"
#> [35] "Betalain.alkaloids"
#> [36] "Flavanones"
#> [37] "Polyene.macrolides"
#> [38] "Yohimbine.like.alkaloids"
#> [39] "Isoquinoline.alkaloids"
#> [40] "Lactones"
#> [41] "Megastigmanes"
#> [42] "Phoslactomycins.or.Phosphazomycins"
#> [43] "Proanthocyanins"
#> [44] "Purine.nucleostides"
#> [45] "Unsaturated.fatty.acids"
#> [46] "Cephalosporins"
#> [47] "Fatty.alcohols"
#> [48] "Fatty.aldehydes"
#> [49] "Monosaccharides"
#> [50] "Oligomycins"
#> [51] "X3.Spirotetramic.acids"
#> [52] "Carboline.alkaloids"
#> [53] "Cinnamic.acids.and.derivatives"
#> [54] "Limonoids"
#> [55] "Tetracyclines"
#> [56] "Abietane.diterpenoids"
#> [57] "Cardenolides"
#> [58] "Coumaronochromones"
#> [59] "Daucane.sesquiterpenoids"
#> [60] "Flavonols"
#> [61] "Isoflavones"
#> [62] "Phenoxazine.alkaloids"
#> [63] "RiPPs.Cyanobactins"
#> [64] "Apocarotenoids.β."
#> [65] "Flavan.3.ols"
#> [66] "Indole.diketopiperazine.alkaloids.L.Trp..L.Ala"
#> [67] "Isoindole.alkaloids"
#> [68] "Phenylethylamines"
#> [69] "Simple.aromatic.polyketides"
#> [70] "Cholestane.steroids"
#> [71] "Other.Octadecanoids"
#> [72] "Depsides"
#> [73] "Dipeptides"
#> [74] "Ergostane.steroids"
#> [75] "N.acyl.amines"
#> [76] "Pinane.monoterpenoids"
#> [77] "Amino.cyclitols"
#> [78] "Depsipeptides"
#> [79] "Fatty.acyl.carnitines"
#> [80] "Naphthoquinones"
#> [81] "Boromycins"
#> [82] "Furofuranoid.lignans"
#> [83] "Hydrocarbons"
#> [84] "Oligomeric.stibenes"
#print(sel_rf_npclass_list$`_multiclass_metrics_`)
print(sel_rf_npclass_list$`_model_r2_`)
#> Rsquared
#> 0.97691Draw interactive heatmap.
heatmaply(scale(marchantiales$npclass_list[, which(colnames(marchantiales$npclass_list) %in% sel_rf_npclass_list$`_selected_variables_`)]), k_row=1, k_col=1, colors=colorRampPalette(c('darkblue','white','darkred'), alpha=0.1, bias=1)(256), file=NULL, selfcontained=TRUE, fontsize_row=6, fontsize_col=3)Use Random Forest to select for essential variables.
suppressWarnings(
sel_rf_nppathway_list <- select_features_random_forest(feat_matrix=marchantiales$nppathway_list, sel_factor=as.factor(marchantiales$metadata$species), sel_colors=marchantiales$metadata$color, tune_length=10, quantile_threshold=0.95, plot_roc_filename=NULL)
)
#> note: only 6 unique complexity parameters in default grid. Truncating the grid to 6 .Print selected variables and R-squared.
print(paste("Number of essential variables:", length(unique(unlist(sel_rf_nppathway_list$`_selected_variables_`)))))
#> [1] "Number of essential variables: 7"
print(sel_rf_nppathway_list$`_selected_variables_`)
#> [1] "Alkaloids" "Amino.acids.and.Peptides"
#> [3] "Carbohydrates" "Fatty.acids"
#> [5] "Polyketides" "Shikimates.and.Phenylpropanoids"
#> [7] "Terpenoids"
#print(sel_rf_nppathway_list$`_multiclass_metrics_`)
print(sel_rf_nppathway_list$`_model_r2_`)
#> Rsquared
#> 0.2850012Draw interactive heatmap.
heatmaply(scale(marchantiales$nppathway_list[, which(colnames(marchantiales$nppathway_list) %in% sel_rf_nppathway_list$`_selected_variables_`)]), k_row=1, k_col=1, colors=colorRampPalette(c('darkblue','white','darkred'), alpha=0.1, bias=1)(256), file=NULL, selfcontained=TRUE, fontsize_row=6, fontsize_col=3)Use Random Forest to select for essential variables.
suppressWarnings(
sel_rf_mdes_list <- select_features_random_forest(feat_matrix=marchantiales$mdes_list, sel_factor=as.factor(marchantiales$metadata$species), sel_colors=marchantiales$metadata$color, tune_length=10, quantile_threshold=0.95, plot_roc_filename=NULL)
)Print selected variables and R-squared.
print(paste("Number of essential variables:", length(unique(unlist(sel_rf_mdes_list$`_selected_variables_`)))))
#> [1] "Number of essential variables: 45"
print(sel_rf_mdes_list$`_selected_variables_`)
#> [1] "C2SP1" "khs.ddsN" "khs.dNH" "khs.dssS" "khs.sNH3" "khs.sssN"
#> [7] "khs.tsC" "MDEC.44" "nC" "SCH.4" "khs.ddC" "khs.sPH2"
#> [13] "khs.sSH" "khs.sssNH" "nD" "VCH.4" "khs.tCH" "nR"
#> [19] "XLogP" "khs.ssS" "nRings4" "khs.aaO" "khs.dCH2" "khs.ssssB"
#> [25] "VCH.3" "ALogP" "khs.sCl" "khs.sF" "khs.ssNH2" "ATSc3"
#> [31] "khs.aaaC" "khs.sBr" "khs.ssssN" "nAtomP" "nM" "C1SP1"
#> [37] "khs.dS" "MDEO.22" "nAcid" "khs.sssB" "MDEN.11" "nN"
#> [43] "nRings7" "khs.aaNH" "khs.tN"
#print(sel_rf_mdes_list$`_multiclass_metrics_`)
print(sel_rf_mdes_list$`_model_r2_`)
#> Rsquared
#> 0.5718747Draw interactive heatmap.
Annotate peak tables in negative ion mode.
f.export_maf(cbind(ms1_def_neg, t(feat_list_neg)), "data/metabolites_maf_neg.tsv")
f.annotate_maf_classes(maf_input="data/metabolites_maf_neg.tsv", maf_output="data/metabolites_maf_neg_classes.tsv")
f.annotate_maf_compounds(maf_input="data/metabolites_maf_neg_classes.tsv", maf_output="data/m_MTBLS2239_LC-MS_negative_reverse-phase_metabolite_profiling_v2_maf.tsv", polarity="neg", xcms_id=rownames(ms1_def_neg), pol_mode=rep("neg",nrow(ms1_def_neg)), smiles=ms1_def_neg$smiles, names=ms1_def_neg$name)Annotate peak tables in positive ion mode.
f.export_maf(cbind(ms1_def_pos, t(feat_list_pos)), "data/metabolites_maf_pos.tsv")
f.annotate_maf_classes(maf_input="data/metabolites_maf_pos.tsv", maf_output="data/metabolites_maf_pos_classes.tsv")
f.annotate_maf_compounds(maf_input="data/metabolites_maf_pos_classes.tsv", maf_output="data/m_MTBLS2239_LC-MS_positive_reverse-phase_metabolite_profiling_v2_maf.tsv", polarity="pos", xcms_id=rownames(ms1_def_pos), pol_mode=rep("pos",nrow(ms1_def_pos)), smiles=ms1_def_pos$smiles, names=ms1_def_pos$name)